clear all


set more off 
pause off
set logtype text
set mem 500M

clear all

cd "~/Dropbox (Harvard University)/Monetary Policy with Heterogenous Producers/Replication Code"

ssc install grstyle, replace
grstyle init
grstyle set plain, nogrid
graph set window fontface "Palatino"

*********************************************************************
****************** 		BASIC DATA PREP			*********************
*********************************************************************

import delimited using "data/Compustat/20220401_Compustat_quarterly.csv", clear

* Compute key financial metrics
g dvq = dvy/4
g prstkcq = prstkc/4
g capxq = capxy/4

g meq = cshoq*prccq	 
g beq = atq - ltq - pstkq 	
g bliabq = atq - beq
g blevq = bliabq/atq	
g mvq = meq + atq - beq
g q = mvq/atq
g payaq = (dvq + prstkcq)/ atq
g bbaq = prstkcq / atq
g osq = oiadpq - txtq
g payosq = (dvq + prstkcq)/ osq
g bbosq = prstkcq/ osq
g caq = cheq / atq
g deq = dlttq/(beq+dlttq)

label variable meq "Market Value of Equity"
label variable beq "Book Value of Equity"
label variable bliabq "Book Value of Liabilities"
label variable mvq "Market Value"
label variable q "Tobin's q"
label variable blevq "Leverage"
label variable payaq "Payout/assets"
label variable payosq "Net operating surplus"
label variable payosq "Payout/operating surplus"
label variable caq "Cash holdings / assets"
label variable capxq "Capital expenditures"

save "data/Compustat/Compustat_clean", replace

*** ADD TOTAL Q***
import delimited using "data/Compustat/20220329_TotalQ.csv", clear
rename fyear fyearq
save "data/Compustat/TotalQ_clean", replace

use "data/Compustat/Compustat_clean", clear
merge m:1 gvkey fyearq using "data/Compustat/TotalQ_clean"
drop if _merge==2
drop _merge

g k_pt = ppentq + k_int
g shareintan = k_int/k_pt
g share_offbs = k_int_offbs/k_pt

destring, replace

* Apply missing data filters 
drop if fyearq==.
drop if gvkey==.
drop if datacqtr==""
* Force drop duplicate observations within calendar quarter
sort gvkey datacqtr
duplicates drop gvkey datacqtr, force 
g quarter = quarterly(datacqtr, "YQ")
format quarter %tq
g year = yofd(dofq(quarter))

save "data/Compustat/Compustat_clean", replace

*** Add 10Y rate, 10Y real rate, bond spread, investment price index, and CPI/GDPDEF
import delimited using "data/FRED/deflators.csv", clear
save "data/FRED/deflators", replace

import delimited using "data/FRED/10Yrates_quarterly.csv", clear
rename dgs10 r10 
rename reaintratrearat10y r10_real
merge 1:1 date using "data/FRED/deflators"
drop _merge
replace consdef = consdef / 100
replace invdef = invdef / 100
replace gdpdef = gdpdef /100
g exp_infl_10 = r10 - r10_real
g quarter_date = date(date, "YMD")
g quarter = qofd(quarter_date)
format quarter %tq
tsset quarter
* Impute real rate using historical inflation for years where it is missing
g infl_1y = (log(consdef) - log(L4.consdef)) * 100 / 1
g infl_3y = (log(consdef) - log(L12.consdef)) * 100 / 3
g infl_5y = (log(consdef) - log(L20.consdef)) * 100 / 5
g infl_10y = (log(consdef) - log(L40.consdef)) * 100 / 10
regress exp_infl_10 infl_*y 
predict exp_infl_10pred
replace exp_infl_10 = exp_infl_10pred if exp_infl_10==.
replace r10_real = r10 - exp_infl_10 if r10_real==.
gen bondspread = baa - r10
rename gdpdef p_gdp 
rename invdef p_i 
keep quarter p_gdp p_i r10 r10_real bondspread
g yoy_growth_p_gdp = log(p_gdp) - log(L4.p_gdp)
g yoy_growth_p_i = log(p_i) - log(L4.p_i)
save "data/FRED/quarterly_allFRED_clean", replace

use "data/Compustat/Compustat_clean", clear
merge m:1 quarter using "data/FRED/quarterly_allFRED_clean"
drop if _merge==2
drop _merge
save "data/Compustat/Compustat_clean", replace



****************************************************************************
********************** DEFINE INDUSTRIES **********************
****************************************************************************


***** SIC2-3 and NAICS2-4 SEGMENTS ***** 
use "data/Compustat/Compustat_clean", clear

tostring sic, gen(sicstr)
g sic3 = substr(sicstr, 1, 3)
g sic2 = substr(sicstr, 1, 2)
replace sic3 = "" if strlen(sic3) < 3
replace sic2 = "" if strlen(sic2) < 2
destring sic3, replace
destring sic2, replace

tostring naics, gen(naicsstr)
g naics4 = substr(naicsstr, 1, 4)
g naics3 = substr(naicsstr, 1, 3)
g naics2 = substr(naicsstr, 1, 2)
replace naics4 = "" if strlen(naics4) < 4
replace naics3 = "" if strlen(naics3) < 3
replace naics2 = "" if strlen(naics2) < 2
destring naics4, replace
destring naics3, replace
destring naics2, replace

compress
sort gvkey datacqtr
save "data/Compustat/Compustat_clean", replace

* map NAICS3 using SIC if missing
drop if naics3 == .
collapse (count) gvkey, by (sic naics3)
g ngvkey = -gvkey
sort sic ngvkey
bys sic : keep if _n==1
keep sic naics3
rename naics3 naics3_mapped
drop if sic==.
tostring naics3_mapped, gen(naics3str)
g naics2_mapped = substr(naics3str, 1, 2)
destring naics2_mapped, replace
drop naics3str
save "data/Compustat/sic_naics_xwalk", replace

use "data/Compustat/Compustat_clean", clear
merge m:1 sic using "data/Compustat/sic_naics_xwalk"
replace naics3 = naics3_mapped if naics3==.
replace naics2 = naics2_mapped if naics2==.
drop _merge 
drop naics2_mapped
drop naics3_mapped

drop if sic==.
save "data/Compustat/Compustat_clean", replace

***** BEA Segments *****
*** Mapping from NAICS to BEA data ***
import excel using "data/crosswalks/NAICS4_BEA.xlsx", sheet("naics_bea_final_values") clear firstrow
drop data_count matched_row*
replace bea_des = strtrim(bea_des)
replace indgroupdes = strtrim(indgroupdes)
save "data/crosswalks/naics4_bea", replace

import excel using "data/crosswalks/NAICS4_BEA.xlsx", sheet("naics_bea_final_values") clear firstrow
tostring naics4, gen(naics4_str)
g naics3 = substr(naics4_str, 1, 3)
destring naics3, replace
collapse (sum) data_count, by(bea_code naics3)
gen neg_data_count = -data_count
sort naics3 neg_data_count
duplicates drop naics3, force
drop data_count neg_data_count
rename bea_code bea_code_naics3
save "data/crosswalks/naics3_bea", replace

use "data/Compustat/Compustat_clean", clear
merge m:1 naics4 using "data/crosswalks/naics4_bea"
drop if _merge==2
drop _merge
merge m:1 naics3 using "data/crosswalks/naics3_bea"
drop _merge

replace bea_code = bea_code_naics3 if bea_code==""
drop bea_code_naics3

// Exclude US Postal Service (NAICS 491)
drop if naics3==491

compress
sort gvkey datacqtr
save "data/Compustat/Compustat_clean", replace

*** Merge Industry Depreciation rates
import excel using "data/BEA/DetailNonres_rate.xlsx", sheet("compiled_clean") firstrow clear
drop INDUSTRYTITLE NAICSCodes
reshape long depr, i(bea_code) j(year)
save "data/BEA/bea_code_depreciation_rates", replace

use "data/Compustat/Compustat_clean", clear
merge m:1 bea_code year using "data/BEA/bea_code_depreciation_rates"
drop if _merge==2
* Unmerged are all 2021-2022
drop _merge

sort gvkey datacqtr
xtset gvkey quarter
save "data/Compustat/Compustat_clean", replace

************************************************************
************** FIELDS FOR MARK-UP ESTIMATES  ***************
************************************************************
use "data/Compustat/Compustat_clean", clear

* Basic firm-level metrics
g logatq = log(atq)
g nblevq = (bliabq - cheq)/atq
g txtoiq = txtq/oiadpq
g txdbaq = txdbq/atq
g cfq = ibq + dpq
g xrdatq = xrdq/atq
g xrdsaleq = xrdq/saleq

* Lerner Index
g li = (oibdpq - dpq) / saleq
g mu_li = 1/(1-li )
label variable li "Lerner Index"
label variable mu_li "Markup Lerner Index"
		
* For De Loecker and Eeckhout
g sale09 = saleq/p_gdp
g cogs09 = cogsq/p_gdp
g ppent09 = ppentq/p_i
g ppegt09 = ppegtq/p_i
g k_pt09 = k_pt/p_i
g oibdp09 = oibdpq/p_gdp
g totexp09 = sale09 - oibdp09 

g logsale09 = log(sale09)
g logcogs09 = log(cogs09)
g logtotexp09 = log(totexp09)
g logppent09 = log(ppent09)
g logppegt09 = log(ppegt09)
g logk_pt09 = log(k_pt09)

g cogssale = cogsq/saleq
g totexpsale = totexp09/sale09

xtset gvkey quarter

* FOR CFG
g salek1 = saleq/l.ppentq
g salek2 = saleq/(l.ppentq + l.intanq)
g osk1q = osq/l.ppentq
g osk2q = osq/(l.ppentq + l.intanq)

g osatq = osq/l.atq
g cfatq = cfq/l.atq
g cfkq = cfq/l.ppentq

* Leverage ratio
egen a1_dlttq = sum(dlttq), by(bea_code year) missing
egen a1_beq = sum(beq), by(bea_code year) missing
g a1_de = a1_dltt/(a1_be+a1_dltt)

* Goodwill and intangibles
g gwaq = gdwlq/atq
g intanexgwq = intanq - gdwlq
g intanexgwat = intanexgwq/atq
g intanatq = intanq/atq
label variable gwaq "Goodwill/Assets"

* winsorize ratios
winsor2 mu_li, replace cuts(1 99) by(quarter)

save "data/Compustat/Compustat_clean", replace


******************************************************
************ CABALLERO, FARHI, GOURINCHAS ************
******************************************************

* Add Damodaran ERP estimates 
import excel "data/Damadoran_ERP/histimpl.xls", sheet("Historical Impl Premiums") cellrange("A7") firstrow clear
drop if TBondRate==.
destring, replace
rename TBondRate 			tbond_dam
rename ImpliedPremiumDDM 	erp_damddm 
rename ImpliedERPFCFE 		erp_damfcf
rename Year 				year
keep year erp* tbond_dam
sort year
save "data/Damadoran_ERP/Damadoran_ERP_clean", replace

use "data/Compustat/Compustat_clean", clear
merge m:1 year using "data/Damadoran_ERP/Damadoran_ERP_clean"
drop if _merge==2
drop _merge
sort gvkey quarter
xtset gvkey quarter

* Capital: Perpetual Inventory Method 
sort gvkey quarter
xtset gvkey quarter
g cap = 0
replace cap = ppegtq
replace cap = L.cap + (ppentq-L.ppentq) if !missing(L.cap) & !missing(ppentq) & !missing(L.ppentq)
g log_k = log(cap)

sort gvkey quarter
xtset gvkey quarter
by gvkey: ipolate intanq quarter, generate(intanq_interp)
g cap_with_intan = cap + intanq_interp
replace cap_with_intan = cap if cap_with_intan==.

*** Compute profit shares ***
g rk_damfcf = erp_damfcf + tbond_dam
g mixrate_dam = a1_de * (r10 + bondspread) + (1-a1_de)*(rk_damfcf)
g usercost_rate = mixrate_dam/100 - (1-depr)*(yoy_growth_p_i - yoy_growth_p_gdp)
g kcost_1 = usercost_rate * L.cap
g kcost_2 = usercost_rate * (L.cap_with_intan)
g mu_cfg1_dam = 1 / (1 - (osq - kcost_1)/saleq)
g mu_cfg2_dam = 1 / (1 - (osq - kcost_2)/saleq)

foreach X in mu_cfg1_dam mu_cfg2_dam{
	winsor2 `X', replace cuts(1 99) by(quarter) 
}

save "data/Compustat/Compustat_clean", replace

******************************************************
************ Quarterly estimates ************
******************************************************

* Since we are using quarterly data, to ensure seasonality does not affect results
* we will calculate markups using cumulative values over the last four quarters

use "data/Compustat/Compustat_clean", clear

* Lerner Index
xtset gvkey quarter
local cum_var_list sale oibdp dp os
foreach x in `cum_var_list' {
	g `x'_ms4 = `x'q + L1.`x'q + L2.`x'q + L3.`x'q
}

g li_ms4 = (oibdp_ms4 - dp_ms4) / sale_ms4
g mu_li_ms4 = 1/(1-li_ms4)
winsor2 mu_li_ms4, replace cuts(1 99) by(quarter)

g mu_cfg1_ms4 = 1 / (1 - (os_ms4 - kcost_1)/sale_ms4)
g mu_cfg2_ms4 = 1 / (1 - (os_ms4 - kcost_2)/sale_ms4)
g sale_ms4_win = sale_ms4

foreach X in mu_cfg1_ms4 mu_cfg2_ms4 sale_ms4_win {
	winsor2 `X', replace cuts(1 99) by(quarter) 
}


save "data/Compustat/Compustat_clean", replace

